Load packages and data

library(tidyverse)
library(ggplot2)
library(ggthemes)
library(extrafont) # loadfonts(device = "win")
library(plotly)
library(DT)
survey <- read_csv("survey_SCF.txt", col_names = TRUE)

1. Debt over time

byyear <- survey %>%
  select(YEAR, INCOME, NH_MORT, OTHLOC, CCBAL, INSTALL, EDN_INST, VEH_INST, TPAY, INCOME) %>%
  group_by(YEAR) %>%
  summarise_all(sum)
avgbyyear <- survey %>%
  select(YEAR, INCOME, NH_MORT, OTHLOC, CCBAL, INSTALL, EDN_INST, VEH_INST, TPAY, INCOME) %>%
  group_by(YEAR) %>%
  summarise_all(mean)
chart1 <- byyear %>%
  ggplot +
  aes(factor(YEAR), EDN_INST/1000000, group = 1)+
  geom_line() +
  geom_point(shape=23, fill="steelblue4", size=4) +
  scale_y_continuous(labels = scales::comma_format()) +
  theme_minimal() +
   theme(text = element_text(size = 11, family = "Garamond", color = "grey20"), 
    plot.title = element_text(size = 16, face = "bold", color="slategray")) +
  labs(title = "Total Student Debt over the Years", 
       subtitle = "This simple connected line chart is effective in helping draw attention to the dramatic increase 
       of student loan debt over the survey years.", 
       x = "Years", y = "Education Loans ($ in the Millions)") 
chart1

chart2 <- avgbyyear %>%
  gather(loan_type, loan_amount, c(NH_MORT, VEH_INST, EDN_INST, CCBAL)) %>%
  ggplot +
  aes(fill = loan_type, x = factor(YEAR), y = loan_amount/1000, label = loan_type) + 
  geom_bar(position="stack", stat="identity") + 
  theme_minimal() +
  theme(legend.position="bottom",
        text = element_text(size = 11, family = "Garamond", color = "grey20"), 
    plot.title = element_text(size = 16, face = "bold", color="slategray")) +
  labs(title ="Amount of Debt for the Average of Person", 
       subtitle = "A stacked bar chart is helpful to see the ratio of different debt for an average person and how it changes 
       over the years. ", 
       x = "Years", y = "Total Debt ($ in the thousands)") +
  scale_fill_economist(name = "Loan Types",  labels = c("Credit Cards", 
                                                       "Education", "Mortgage", 
                                                       "Vehicle")) +
  scale_y_continuous(labels = scales::comma_format())

chart2

2. Tell me who you are

survey_2016 <- filter(survey, YEAR == 2016) 
chart3 <- survey_2016 %>%
  filter(AGECL == 1 & HHSEX == 2 & FAMSTRUCT == 2 & RACE == 5) %>%
  ggplot+
  aes(x = factor(EDCL), y = INCOME,  fill=factor(EDCL)) + 
  geom_boxplot() +
    scale_fill_economist() +
    geom_jitter(color="slategray", size=0.4, alpha=0.9) +
  scale_x_discrete(labels=c("HS/GED", "Some College", "College")) +
  theme_minimal() +
  theme(legend.position="none",
    text = element_text(size = 11, family = "Garamond", color = "grey20"), 
    plot.title = element_text(size = 16, face = "bold", color="slategray")) +
  labs(title ="What if I didn't go to college?", 
       subtitle = "I am interested in looking at the income and education of people similar to me in this chart. Therefore, I 
       filtered for my demographics and then feed it into a boxplot. Boxplots with jitter points are good in this 
       situation because it helps to see the full distribution.", 
       caption = "Year: 2016",
       y = "Annual Income ($)", x = NULL) +
  scale_y_continuous(labels = scales::comma_format())

chart3

chart4 <- survey_2016 %>%
  filter(FAMSTRUCT == 1,4,5) %>%
  ggplot+
  aes(x = factor(KIDS), y = EDN_INST/1000, color = factor(EDCL)) + 
  geom_jitter() +
  labs(title ="Family and Student Loans in 2016", 
  subtitle = "Using a scatter plot is dynamic visually in this situation because it highlights the density of the X variable. 
       Having the third dimension as color helps further separate the loan amount per Education level 
       for the reader.",
       x = "Amount of Kids", 
  y = "Student Loan Debt ($ in the thousands)") +
  theme_minimal() +
  theme(legend.position="bottom",
    text = element_text(size = 11, family = "Garamond", color = "grey20"), 
    plot.title = element_text(size = 16, face = "bold", color="slategray")) +
  scale_color_economist(name = "Education Level",  labels = c("NO HS/GED",
                                                              "HS/GED", 
                                                              "Some College", "College")) +
  scale_y_continuous(labels = scales::comma_format())

chart4

3. Wealth and Income Distribution

avgedu_survey <- survey_2016 %>%
  select(INCOME, NH_MORT, OTHLOC, CCBAL, INSTALL, 
         EDN_INST, VEH_INST, TPAY, INCOME, NWCAT, EDCL) %>%
  group_by(NWCAT,EDCL)%>%
  summarise_all(mean) 
chart5 <- avgedu_survey %>%
  ggplot() +
  aes(fill=factor(EDCL), y=EDN_INST, x= factor(NWCAT)) + 
    geom_bar(position="dodge", stat="identity") + 
  theme_minimal() +
  theme(legend.position="bottom",
    text = element_text(size = 11, family = "Garamond", color = "grey20"), 
    plot.title = element_text(size = 16, face = "bold", color="slategray")) +
  labs(title = "Changes in Average Student Loans by Education in 2016", 
       subtitle = "Group bar charts are helpful here in that they help to easily put the households and Education Levels into data 
       bins. We see that the lower class bears the majority of the burden when it comes to student loan debt. ", 
       x = "Household Net Worth Percentile", y = "Student Loans ($)") +
  scale_fill_economist(name = "Education Level", 
                       labels = c("NO HS/GED","HS/GED","Some College", "College")) +
  scale_x_discrete(labels=c("0-24.9", "25-49.9", "50-74.9", "75-89.9", "90-100")) +
  scale_y_continuous(labels = scales::comma_format())

chart5

avgage_survey <- survey_2016 %>%
  select(INCOME, NH_MORT, OTHLOC, CCBAL, INSTALL, 
         EDN_INST, VEH_INST, TPAY, INCOME, NWCAT, RACE) %>%
  group_by(NWCAT,RACE)%>%
  summarise_all(mean) 
chart6 <- survey_2016 %>%
  ggplot() +
  aes(y=AGE, x= EDN_INST, size = INCCAT, color = factor(EDCL)) + 
  geom_point(alpha = .1) + 
  scale_size(range = c(1, 10), name="Income Percentile", 
             labels=c("0-20", "20-39.9", "40-59.9", "60-79.9", "80-89.9", "90-100")) +
  scale_x_continuous(labels = scales::comma_format()) +
  scale_color_economist(name="Education Level", 
             labels=c("NO HS/GED", "HS/GED", "Some College", "College"))+
  theme_minimal() +
  theme(text = element_text(size = 11, family = "Garamond", color = "grey20"), 
    plot.title = element_text(size = 16, face = "bold", color="slategray")) +
  labs(title = "Age, Wealth, and Education in 2016", 
       subtitle = "A 4 dimensional bubble chart might be overwhelming initially to the readers, but it contains a wealth information. 
       It shows concentration of loans for 3 different categorical variables. I tried to lessen the initial density of the 
       materials by choosing a very minimal theme and playing around with the transparency
       ", 
       x = "Education Loans ($)", y = "Age")

chart6

4. Going broke

broke_survey <- survey %>%
  filter(BNKRUPLAST5 == 1) %>%
  select(YEAR, EDN_INST, INCOME) %>%
  group_by(YEAR)%>%
  summarise_all(mean)
chart7 <- broke_survey %>%
  ggplot() +
  aes(x = YEAR) +
  geom_line( aes(y=EDN_INST, color = "Education Loans")) + 
  geom_line( aes(y=INCOME/10, color = "Income")) +
  scale_y_continuous(
    name = "Student Loans ($)",
    sec.axis = sec_axis(~.*10, name="Income ($)", labels = scales::comma_format()),
    labels = scales::comma_format()) + 
  scale_colour_economist(name = NULL) + 
  theme_minimal() +
  theme(text = element_text(size = 11, family = "Garamond", color = "grey20"), 
    plot.title = element_text(size = 16, face = "bold", color="slategray"), 
    legend.position = "bottom", 
    axis.text.y = element_text(size=8, margin=margin(0,0,0,10)),
    axis.text.y.right = element_text(size=8, margin=margin(0,20,0,0))) +
  labs(title = "Factors of Bankruptcy", 
       subtitle = "To examine student loans as a factor of bankruptcy, I harnessed a dual Y-axis to show how the two variables 
       interact with one another. However, having two different Y-axis values can be confusing for readers 
       who are not graphically inclined. This type of graph is best for a more knowledgeable audience.", x = "Year") 

chart7

bankrupt_food <- survey %>%
  gather(food_type, food_amount, c( FOODAWAY, FOODHOME, FOODDELV)) 
bankrupt_food$BNKRUPLAST5 <- as.factor(bankrupt_food$BNKRUPLAST5)
levels(bankrupt_food$BNKRUPLAST5) <- c("Declared Bankruptcy*", "Has Not Declared Bankruptcy*")
levels(bankrupt_food$BNKRUPLAST5)
## [1] "Declared Bankruptcy*"         "Has Not Declared Bankruptcy*"
chart8 <- bankrupt_food %>%
  ggplot() +
  aes(x=food_type, y=food_amount, color=as.factor(BNKRUPLAST5)) + 
  geom_point(size=3) +  
  facet_wrap(~BNKRUPLAST5) +
  scale_color_economist()+
  theme_minimal()+
  theme(legend.position="none", 
        text = element_text(size = 11, family = "Garamond", color = "grey20"), 
        plot.title = element_text(size = 16, face = "bold", color="slategray"))+
  labs(title = "Food and Bankruptcy", 
       subtitle = "This facet wrap chart shows the differences in spending on food between does who has and has not declared 
       bankruptcy in the past 5 years. Seeing the two next to each other help visually  when we are trying to 
       compare two types.", 
       x = "Food Type", y = "Food Amount ($)",
       caption = "*In the past 5 years") +
  scale_x_discrete(labels=c("Away from Home", "Delivery", "At Home")) +
   scale_y_continuous(labels = scales::comma_format()) 
chart8

5 Make two plots interactive

The “Family and Student Loans” and the “Average Amount of Debt” charts are good for interactivty because they both have densed and stacked information. In the case of the scatter plot, there’s a lot of points on top of one another. With interactivity, the reader can look at each point individually. As for the stacked chart, finding the amount can be hard with varying intercepts for each category of data. Having interactivity makes it easier as you can simply hover now. However, my labels and other changes in my ggplot2 coded did not work when translating over to Plotly. Plotly is a nice beginner tool however and is widely used by the public so it’s worth considering for visualization projects.

ggplotly(chart4)
ggplotly(chart2)

6. Data Table

I decided to make an interactive Data Table that focuses on demographic variables by year so that readers can filter for themselves easily. Data Tables can be overwhelming at times and that’s why I chose a simple format style and only five variables of interest. This is for easy navigation.

datatable <- survey %>%
  select(YEAR, AGE, EDUC, INCOME, DEBT) %>%
  rename(EDUCATION = EDUC) %>%
  group_by(YEAR, AGE, EDUCATION) %>%
  summarise_all(sum)
datatable$EDUCATION <- as.factor(datatable$EDUCATION)
levels(datatable$EDUCATION) <- c("LESS THAN 1ST GRADE", 
                                       "1ST, 2ND, 3RD, OR 4TH GRADE",
                                       "5TH OR 6TH GRADE", "7TH OR 8TH GRADE",
                                       "9TH GRADE","10TH GRADE","11TH GRADE",
                                       "12TH GRADE, NO DIPLOMA",
                                       "HIGH SCHOOL GRADUATE - HIGH SCHOOL DIPLOMA OR EQUIVALENT",
                                       "SOME COLLEGE BUT NO DEGREE", "ASSOCIATE DEGREE IN COLLEGE - OCCUPATION/VOCATION PROGRAM", 
                                       "ASSOCIATE DEGREE IN COLLEGE - ACADEMIC PROGRAM",
                                       "BACHELOR'S DEGREE (FOR EXAMPLE: BA, AB, BS)", 
                                       "MASTER'S DEGREE", "DOCTORATE OR PROFESSIONAL SCHOOL DEGREE")
levels(datatable$EDUCATION)
##  [1] "LESS THAN 1ST GRADE"                                      
##  [2] "1ST, 2ND, 3RD, OR 4TH GRADE"                              
##  [3] "5TH OR 6TH GRADE"                                         
##  [4] "7TH OR 8TH GRADE"                                         
##  [5] "9TH GRADE"                                                
##  [6] "10TH GRADE"                                               
##  [7] "11TH GRADE"                                               
##  [8] "12TH GRADE, NO DIPLOMA"                                   
##  [9] "HIGH SCHOOL GRADUATE - HIGH SCHOOL DIPLOMA OR EQUIVALENT" 
## [10] "SOME COLLEGE BUT NO DEGREE"                               
## [11] "ASSOCIATE DEGREE IN COLLEGE - OCCUPATION/VOCATION PROGRAM"
## [12] "ASSOCIATE DEGREE IN COLLEGE - ACADEMIC PROGRAM"           
## [13] "BACHELOR'S DEGREE (FOR EXAMPLE: BA, AB, BS)"              
## [14] "MASTER'S DEGREE"                                          
## [15] "DOCTORATE OR PROFESSIONAL SCHOOL DEGREE"
datatable %>%
  datatable(
    rownames = FALSE,
    colnames = colnames(datatable),
    filter = list(position = "top"),
    options = list(language = list(sSearch = "Filter:")))%>%
    formatStyle('YEAR',  color = 'white', 
                backgroundColor = 'darkblue', fontWeight = 'bold')